*-------------------------------------------------------------------------------
*						Appendix IV Table 1
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out" 

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Appendix IV Table 1", replace 
set more off     


*-------------------------------------------------------------------------------*
*					Step1: Generate Data
*-------------------------------------------------------------------------------
clear
clear all

//empty file for processed data
set obs 0
save tabledata,replace emptyok

foreach j in "2000" "2010" "2015"{
use "`j'census_consistent.dta",clear

*生成新的id
gen id_new=_n

*职业数量
collapse (count) id_new,by(consistent title_consistent new new_title)

gen sector="农" if real(substr(new,1,2))<=5
replace sector="工" if real(substr(new,1,2))>5 & real(substr(new,1,2))<=59
replace sector="服务" if real(substr(new,1,2))>59

bys consistent title_consistent:egen variety=count(new)
unique new
gen variety_share=variety/(`r(unique)')*100

bys consistent title_consistent:egen occ_pop=total(id_new)
gen ind_share=id_new/occ_pop*100

bys consistent title_consistent sector:egen sector_pop=total(id_new)
gen sec_share=sector_pop/occ_pop*100

bys consistent title_consistent:egen max_ind=max(ind_share)
bys consistent title_consistent:egen max_sec=max(sec_share)

keep consistent title_consistent new new_title sector ind_share sec_share variety_share variety max_ind max_sec
duplicates drop 

gen year=`j'

append using tabledata
save tabledata,replace
}


*-------------------------------------------------------------------------------*
*					Step2: Esitmation for main paper [Page 6]
*-------------------------------------------------------------------------------
clear
clear all

use tabledata,clear

*HHI index for ind
gen o_share_jt=(ind_share/100)^2

*HHI index for sec
bys consistent title_consistent sector year:gen temp=_n
gen o_share_st=(sec_share/100)^2 if temp==1
replace o_share_st=0 if temp!=1

bys year consistent title_consistent:egen HHI_ind=total(o_share_jt)
bys year consistent title_consistent:egen HHI_sec=total(o_share_st)

collapse (mean) variety variety_share HHI* max_ind max_sec,by(year)


*-------------------------------------------------------------------------------*
*					Step2: Esitmation for appendix
*-------------------------------------------------------------------------------
clear
clear all

//empty file for processed data
set obs 0
save table,replace emptyok

use tabledata,clear

keep consistent title_consistent variety variety_share max_ind max_sec year
duplicates drop
foreach var of varlist variety-max_sec{
preserve
statsby, by(year) clear: centile `var', centile(1, 20, 40, 50, 60, 80, 99)
keep year c_*
rename c_* `var'*
reshape long `var',i(year) j(pctile)
rename `var' value
gen index="`var'"
append using table
save table,replace
restore 
}

use table,clear
reshape wide value,i(year pctile) j(index,string)
rename value* *
format variety %9.0f
format max_ind max_sec variety_share %9.2f

//定义pctile名称
gen location="1th" if pctile==1
replace location="20th" if pctile==2
replace location="40th" if pctile==3
replace location="50th" if pctile==4
replace location="60th" if pctile==5
replace location="80th" if pctile==6
replace location="99th" if pctile==7
drop pctile
order year location variety variety_share max_ind max_sec

list _all
erase tabledata.dta
erase table.dta

log close



